from docx import Document
import os
import re


# 定义一个函数来提取技能关键词
def extract_skills(file_path_resume):
    document = Document(file_path_resume)
    skill_list = []

    # 遍历文档中的所有段落
    for paragraph in document.paragraphs:
        # 使用正则表达式匹配技能关键词
        matches = re.findall(r'\b(Python|Java|C\+\+|MySQL|MongoDB|Git|Docker)\b', paragraph.text)
        skill_list.extend(matches)

    return skill_list


# 定义简历文件夹路径
resume_folder = 'resume_list'

# 获取文件夹中的所有简历文件
resume_files = [f for f in os.listdir(resume_folder) if f.endswith('.docx')]

# 提取所有简历的技能关键词
all_skills = {}

for resume_file in resume_files:
    file_path = os.path.join(resume_folder, resume_file)
    skills = extract_skills(file_path)
    all_skills[resume_file] = skills

# 打印所有简历的技能关键词
for resume_file, skills in all_skills.items():
    print(f'简历文件: {resume_file}')
    print(f'技能关键词: {", ".join(skills)}')
    print('-' * 40)
